package Spark原理.物理图_RDD放置在集群中运行

import org.apache.spark.{SparkConf, SparkContext}
import org.junit.Test

class wordCount {

  @Test
  def test: Unit ={

    val conf = new SparkConf().setMaster("local[6]").setAppName("wordCounts")
    val sc   = new SparkContext(conf)

    val data = sc.parallelize(Seq("hadoop spark","spark hadoop","hadoop spark"))
    val splitRDD = data.flatMap( _.split(" ") )
    val mapRDD = splitRDD.map( (_,1) )
    val reduceRDD = mapRDD.reduceByKey( _+_ )
    val strRDD = reduceRDD.map(item => s"${item._1},${item._2}" )

    strRDD.collect().foreach(println(_))
  }

}
